library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.3.6 ✔ purrr 0.3.4
## ✔ tibble 3.1.8 ✔ dplyr 1.0.10
## ✔ tidyr 1.2.1 ✔ stringr 1.4.1
## ✔ readr 2.1.2 ✔ forcats 0.5.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
library(easystats)
## # Attaching packages: easystats 0.5.2 (red = needs update)
## ✖ insight 0.18.4 ✖ datawizard 0.6.2
## ✔ bayestestR 0.13.0 ✔ performance 0.10.0
## ✔ parameters 0.19.0 ✖ effectsize 0.7.0.5
## ✔ modelbased 0.8.5 ✖ correlation 0.8.2
## ✔ see 0.7.3 ✔ report 0.5.5
##
## Restart the R-Session and update packages in red with `easystats::easystats_update()`.
library(dplyr)
library(kableExtra)
##
## Attaching package: 'kableExtra'
##
## The following object is masked from 'package:dplyr':
##
## group_rows
library(DT)
library(modelr)
##
## Attaching package: 'modelr'
##
## The following objects are masked from 'package:performance':
##
## mae, mse, rmse
df<-read_csv("./data/GradSchool_Admissions.csv")
## Rows: 400 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (4): admit, gre, gpa, rank
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
df %>% head() %>% kable() %>% kable_classic(lightable_options = 'hover')
| admit | gre | gpa | rank |
|---|---|---|---|
| 0 | 380 | 3.61 | 3 |
| 1 | 660 | 3.67 | 3 |
| 1 | 800 | 4.00 | 1 |
| 1 | 640 | 3.19 | 4 |
| 0 | 520 | 2.93 | 4 |
| 1 | 760 | 3.00 | 2 |
datfram<-df%>% mutate(admitancetf=case_when(admit==1~TRUE,
TRUE~FALSE))
p1<-datfram %>% ggplot(aes(x=gre,y=gpa,color=admitancetf))+
geom_smooth(se=FALSE)+
theme_minimal()
plotly::ggplotly(p1)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
p<-datfram %>%
ggplot(aes(x=gre,y=rank,color=admitancetf))+
geom_density2d()+
theme_minimal()
plotly::ggplotly(p)
datatable(datfram, rownames = FALSE, filter="top", options = list(pageLength = 5, scrollX=T) )
GGally::ggpairs(datfram)
## Registered S3 method overwritten by 'GGally':
## method from
## +.gg ggplot2
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
mod1<-glm(data=datfram,
formula = admitancetf ~ gre,
family = "binomial")
mod2<-glm(data=datfram,
formula = admitancetf ~ gre + gpa,
family = "binomial")
mod3<-glm(data=datfram,
formula = admitancetf ~ gre + gpa + rank,
family = "binomial")
mod4<-glm(data=datfram,
formula = admitancetf ~ gre * gpa,
family = "binomial")
mod5<-glm(data=datfram,
formula = admitancetf ~ gre * gpa + rank,
family = "binomial")
compare_performance(mod1,mod2,mod3,mod4,mod5) %>% plot()
compare_performance(mod1,mod2,mod3,mod4,mod5,rank=TRUE)
## # Comparison of Model Performance Indices
##
## Name | Model | Tjur's R2 | RMSE | Sigma | Log_loss | Score_log | Score_spherical | PCP | AIC weights | BIC weights | Performance-Score
## -----------------------------------------------------------------------------------------------------------------------------------------
## mod5 | glm | 0.105 | 0.440 | 1.075 | 0.571 | -50.481 | 0.004 | 0.612 | 0.604 | 0.172 | 65.59%
## mod3 | glm | 0.099 | 0.442 | 1.077 | 0.574 | -50.496 | 0.004 | 0.609 | 0.396 | 0.827 | 64.60%
## mod4 | glm | 0.052 | 0.453 | 1.098 | 0.597 | -48.633 | 0.004 | 0.589 | 3.99e-05 | 8.33e-05 | 39.47%
## mod2 | glm | 0.047 | 0.455 | 1.100 | 0.600 | -48.679 | 0.004 | 0.587 | 3.11e-05 | 4.78e-04 | 34.95%
## mod1 | glm | 0.034 | 0.458 | 1.105 | 0.608 | -48.247 | 0.003 | 0.581 | 4.86e-06 | 5.50e-04 | 22.23%
add_predictions(datfram,mod5,type = "response") %>%
ggplot(aes(x=gre,y=pred))+
geom_point(color="black")+
geom_point(aes(y=admit,x=gre),alpha=.5,color="red")
# I am not sure how to make this shitty binomial data look better.
# I think the predictions are the chance that they will get in.